% performs cluster analysis on crisis data
clc
clear


% Settings
metric='euclidean'; % distance metric for clustering, e.g. 'euclidean','cityblock'
method='complete';  % method for computing linkages, e.g. 'complete','centroid','median','single','weighted'


%vartypes={'outputloss','ctychar','crchar','boom','vulnerability','polrep','external'};
vartypes={'ctychar','crchar','boom','vulnerability','polrep','external'};
%vartypes={'ctychar'};
outputloss={'LEN','DEP','ACMLOSS','ACMPLOSS'};
%outputloss={};
ctychar={'PCY','ACRGDP'};
%ctychar={};
crchar={'YAGDP','YARCR','YARBM','DCUR','DSD','ARINTR3M'};
%crchar={};
boom={'YBGDP','YBRCR','YBRBM','GAPRCR','GAPRBM','BRINTR3M','BINTR3M'};
%boom={};
%vulnerability={'FBRGDP','CABRGDP','NFARBM'};
vulnerability={'FBRGDP','CABRGDP'};
%vulnerability={};
polrep={'DF','BH','BG','LS','F','LGI','BN','AMC'};
%polrep={};
external={'YPRGDP','YWLDY','CRISISWLD','CRISISREG'};
%external={};
dropped={'LSRD','YBSP','YBRPN','GAPSP','GAPRPN','PDRGDP','GAPREX','BCRTA','BM','SF','BKRA','RCGRGDP','RAI','VIX' };

% loading data
[data labels] = xlsread('infile');
[k,l]=size(data);


% creating labels
varlabel=labels(1,2:l+1);
crlabel=labels(4:k+3);
type=zeros(1,l);
weights=zeros(1,l);

% creating dataset with rescaled variables for use in clustering
rdata=(data-ones(k,1)*meannonans(data))./(ones(k,1)*stdnonans(data));




% clustering with small dataset
% ===================================

for h=1:length(vartypes)
    eval(['vars=',char(vartypes(h)),';']);
    disp([vartypes(h),vars])
    
    
    if isempty(vars)==0
        for hh=1:length(vars)
            ind=find(strcmp(vars(hh),varlabel)==1);
            type(ind)=h;
        end
        weights(type==h)=1/length(vars);
    end
end


% creating small dataset without NaNs
keepvar=find(type>0);                           % finds variables to be used for analysis
weights=weights(keepvar);
varsmall=varlabel(keepvar);
keepobs=find(isnan(mean(data(:,keepvar)'))==0);     % finds crises with incomplete data
datasmall=data(keepobs,keepvar);
rdatasmall=rdata(keepobs,keepvar);
crsmall=crlabel(keepobs);


% compute distance
[nobs,nvars]=size(rdatasmall);
distance=pdist((ones(nobs,1)*weights).*rdatasmall,metric);



% computing dendrogramme
figure
dendrogram(linkage(distance),nobs,'labels',crsmall,'orientation','right')
title(['Distance metric: ',metric,'   Method: ',method])
xlabel(vartypes)    


% cluster analysis 
for i=2:nobs
    
    clust(:,i)=cluster(linkage(distance),'maxclust',i);

    figure
    s=silhouette([],clust(:,i),distance);
    [s,h]=silhouette([],clust(:,i),distance);
    title(['Silhouette coefficients'])
    silh(i)=mean(s);

    disp('')
    disp(['# of clusters: ',num2str(i)])
    for ii=1:i
        disp(' ')
        disp(['Cluster ',num2str(ii)])
        ind=find(clust(:,i)==ii);
        disp(crsmall(ind))

    end


end

figure
plot(silh)
title('Silhouette coefficients')


indclust=cluster(linkage(distance,method),'maxclust',5);
    
    
    
    
disp(['Distance metric: ',metric])

for i=1:max(indclust)
        disp(' ')
        disp(['Cluster ',num2str(i)])
        ind=find(indclust==i);
        disp(crsmall(ind))
        disp(' ')
        for j=1:length(varsmall)
          disp([char(varsmall(j)),': ',num2str(mean(datasmall(ind,j))),' (',num2str(std(datasmall(ind,j))),')'])
        end
end
